import sys
import java.io.FileReader as FileReader
import weka.core.Instances as Instances
import weka.clusterers.SimpleKMeans as KM
import weka.filters.unsupervised.attribute.StringToWordVector as STW
import weka.filters.Filter as Filter

import weka.clusterers.EM as EM

file = FileReader("text.arff")
data = Instances(file)
filter = STW()
filter.setInputFormat(data)
dataFiltered = Filter.useFilter(data, filter)
km = KM()
km.buildClusterer(dataFiltered)
# print km

em = EM()
em.buildClusterer(dataFiltered)

